Start from clean slate and free up memory
Interstate alliance data
## ── Attaching packages ───────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## Warning: package 'ggplot2' was built under R version 3.5.2
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## ── Conflicts ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: tergm
## Loading required package: ergm
## Loading required package: network
## Warning: package 'network' was built under R version 3.5.2
## network: Classes for Relational Data
## Version 1.15 created on 2019-04-01.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## ergm: version 3.9.4, created on 2018-08-15
## Copyright (c) 2018, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## with contributions from
## Li Wang
## Kirk Li, University of Washington
## Skye Bender-deMoll, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
## NOTE: Versions before 3.6.1 had a bug in the implementation of the
## bd() constriant which distorted the sampled distribution somewhat.
## In addition, Sampson's Monks datasets had mislabeled vertices. See
## the NEWS and the documentation for more details.
## Loading required package: networkDynamic
##
## networkDynamic: version 0.9.0, created on 2016-01-12
## Copyright (c) 2016, Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll, University of Washington
## with contributions from
## Zack Almquist, University of California -- Irvine
## David R. Hunter, Penn State University
## Li Wang
## Kirk Li, University of Washington
## Steven M. Goodreau, University of Washington
## Jeffrey Horner
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("networkDynamic").
##
## tergm: version 3.5.2, created on 2018-08-18
## Copyright (c) 2018, Pavel N. Krivitsky, University of Wollongong
## Mark S. Handcock, University of California -- Los Angeles
## with contributions from
## David R. Hunter, Penn State University
## Steven M. Goodreau, University of Washington
## Martina Morris, University of Washington
## Nicole Bohme Carnegie, New York University
## Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Skye Bender-deMoll
## Li Wang
## Kirk Li, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("tergm").
## Loading required package: ergm.count
## Loading required package: statnet.common
## Warning: package 'statnet.common' was built under R version 3.5.2
##
## Attaching package: 'statnet.common'
## The following objects are masked from 'package:ergm':
##
## colMeans.mcmc.list, sweep.mcmc.list
## The following object is masked from 'package:base':
##
## order
##
## ergm.count: version 3.3.0, created on 2018-08-25
## Copyright (c) 2018, Pavel N. Krivitsky, University of Wollongong
## with contributions from
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm.count").
## NOTE: The form of the term 'CMP' has been changed in version 3.2
## of 'ergm.count'. See the news or help('CMP') for more information.
## Loading required package: sna
## sna: Tools for Social Network Analysis
## Version 2.4 created on 2016-07-23.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
## Loading required package: tsna
##
## statnet: version 2018.10, created on 2018-10-17
## Copyright (c) 2018, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("statnet").
## unable to reach CRAN
## [1] "net" "labels"
# 'net' is the adjacency matrix
# 'labels' is a vector of country names
# Initialize a Network
AllyNet <- network(allylist$net, directed=F)
# Add the states' names
network.vertex.names(AllyNet) <- allylist$labels
# Plot the Network
gplot(AllyNet, label = network.vertex.names(AllyNet), gmode="graph")# A more readable plot
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph")# Leave the isolates out?
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F)Senate Co-Sponsorship data
# Read in the cosponsorship data
senlist <- dget("./data/sennet.txt")
# Initialize a Network
SenNet <- network(senlist$net)
# Add the senators' names
network.vertex.names(SenNet) <- senlist$labels
# Plot the Network
gplot(SenNet, label = network.vertex.names(SenNet))# Edges
# Thin the Network
SenNet2 <- network(senlist$net > 10)
# Look at it Again
gplot(SenNet2, label = network.vertex.names(SenNet), edge.col="grey55", label.cex=.75)Campnet data
campnet <- read.csv(file="./data/campnet.csv", header=T, row.names=1, as.is=T)
campnet.attr <- read.csv(file="./data/campnet-attr.csv", header=T, as.is=T)## Network attributes:
## vertices = 18
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 54
## missing edges = 0
## non-missing edges = 54
## density = 0.1764706
##
## Vertex attributes:
## vertex.names:
## character valued attribute
## 18 valid vertex names
##
## No edge attributes
##
## Network edgelist matrix:
## [,1] [,2]
## [1,] 5 1
## [2,] 9 1
## [3,] 12 1
## [4,] 14 1
## [5,] 11 2
## [6,] 5 3
## [7,] 7 3
## [8,] 1 4
## [9,] 3 4
## [10,] 6 4
## [11,] 7 4
## [12,] 8 4
## [13,] 1 5
## [14,] 3 5
## [15,] 6 5
## [16,] 7 5
## [17,] 4 6
## [18,] 5 6
## [19,] 8 6
## [20,] 3 7
## [21,] 4 7
## [22,] 8 7
## [23,] 13 7
## [24,] 4 8
## [25,] 6 8
## [26,] 10 9
## [27,] 12 9
## [28,] 14 9
## [29,] 15 9
## [30,] 2 11
## [31,] 16 11
## [32,] 17 11
## [33,] 1 12
## [34,] 9 12
## [35,] 10 12
## [36,] 14 12
## [37,] 9 14
## [38,] 10 14
## [39,] 12 14
## [40,] 13 15
## [41,] 18 15
## [42,] 2 16
## [43,] 11 16
## [44,] 15 16
## [45,] 17 16
## [46,] 18 16
## [47,] 2 17
## [48,] 11 17
## [49,] 16 17
## [50,] 18 17
## [51,] 13 18
## [52,] 15 18
## [53,] 16 18
## [54,] 17 18
set.vertex.attribute(camp.net, "vertex.names", campnet.attr$Name) # Add name as a node attribute
set.vertex.attribute(camp.net, "Gender", campnet.attr$Gender) # Add gender as a node attribute
set.vertex.attribute(camp.net, "Role", campnet.attr$Role) # Add role as a node attribute
list.vertex.attributes(camp.net)## [1] "Gender" "na" "Role" "vertex.names"
## Network attributes:
## vertices = 18
## directed = TRUE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 54
## missing edges = 0
## non-missing edges = 54
## density = 0.1764706
##
## Vertex attributes:
##
## Gender:
## integer valued attribute
## 18 values
##
## Role:
## integer valued attribute
## 18 values
## vertex.names:
## character valued attribute
## 18 valid vertex names
##
## No edge attributes
##
## Network edgelist matrix:
## [,1] [,2]
## [1,] 5 1
## [2,] 9 1
## [3,] 12 1
## [4,] 14 1
## [5,] 11 2
## [6,] 5 3
## [7,] 7 3
## [8,] 1 4
## [9,] 3 4
## [10,] 6 4
## [11,] 7 4
## [12,] 8 4
## [13,] 1 5
## [14,] 3 5
## [15,] 6 5
## [16,] 7 5
## [17,] 4 6
## [18,] 5 6
## [19,] 8 6
## [20,] 3 7
## [21,] 4 7
## [22,] 8 7
## [23,] 13 7
## [24,] 4 8
## [25,] 6 8
## [26,] 10 9
## [27,] 12 9
## [28,] 14 9
## [29,] 15 9
## [30,] 2 11
## [31,] 16 11
## [32,] 17 11
## [33,] 1 12
## [34,] 9 12
## [35,] 10 12
## [36,] 14 12
## [37,] 9 14
## [38,] 10 14
## [39,] 12 14
## [40,] 13 15
## [41,] 18 15
## [42,] 2 16
## [43,] 11 16
## [44,] 15 16
## [45,] 17 16
## [46,] 18 16
## [47,] 2 17
## [48,] 11 17
## [49,] 16 17
## [50,] 18 17
## [51,] 13 18
## [52,] 15 18
## [53,] 16 18
## [54,] 17 18
# In this and other functions, mode is "digraph" for directed
# and "graph" for symmetric networks.
gden(camp.net, mode="digraph")## [1] 0.1764706
## Mut
## 19
# Reciprocity - proportion of symmetric dyads
# dyadic - ratio of dyads where (i,j)==(j,i) to all dyads
# dyadic.nonnull - ratio of dyads where (i,j)==1 AND (j,i)==1 to all dyads where (i,j)==1
# edgewise - ratio of edges that are reciprocated to all edges where (i,j)==1
grecip(camp.net, measure="dyadic")## Mut
## 0.8954248
## Mut
## 0.5428571
## Mut
## 0.7037037
# Geodesic distances
# By default, nodes that cannot reach each other have a geodesic distance of Inf
# remember, Inf is the constant for infinity.
# Here we'll replace it with the longest theoretically possible path length
# in the network + 1, so 18
camp.geo <- geodist(camp.net, inf.replace=18)
camp.geo## $counts
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [2,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [3,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [4,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [5,] 1 0 1 3 1 1 1 1 1 0 0 1 0
## [6,] 1 0 1 1 1 1 2 1 1 0 0 1 0
## [7,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [8,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [9,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [10,] 3 0 3 3 3 6 3 3 1 1 0 1 0
## [11,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [12,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [13,] 2 3 1 1 1 2 1 1 1 0 3 1 1
## [14,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [15,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [16,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [17,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [18,] 1 2 1 1 1 2 1 1 1 0 2 1 0
## [,14] [,15] [,16] [,17] [,18]
## [1,] 1 0 0 0 0
## [2,] 2 2 1 1 2
## [3,] 1 0 0 0 0
## [4,] 2 0 0 0 0
## [5,] 1 0 0 0 0
## [6,] 1 0 0 0 0
## [7,] 1 0 0 0 0
## [8,] 2 0 0 0 0
## [9,] 1 0 0 0 0
## [10,] 1 0 0 0 0
## [11,] 2 2 1 1 2
## [12,] 1 0 0 0 0
## [13,] 1 1 2 1 1
## [14,] 1 0 0 0 0
## [15,] 1 1 1 2 1
## [16,] 1 1 1 1 1
## [17,] 1 1 1 1 1
## [18,] 1 1 1 1 1
##
## $gdist
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 18 2 1 1 2 2 2 2 18 18 1 18
## [2,] 5 0 7 6 6 7 7 7 4 18 1 5 18
## [3,] 2 18 0 1 1 2 1 2 4 18 18 3 18
## [4,] 3 18 2 0 2 1 1 1 5 18 18 4 18
## [5,] 1 18 1 2 0 1 2 2 3 18 18 2 18
## [6,] 2 18 2 1 1 0 2 1 4 18 18 3 18
## [7,] 2 18 1 1 1 2 0 2 4 18 18 3 18
## [8,] 3 18 2 1 2 1 1 0 5 18 18 4 18
## [9,] 1 18 3 2 2 3 3 3 0 18 18 1 18
## [10,] 2 18 4 3 3 4 4 4 1 0 18 1 18
## [11,] 5 1 7 6 6 7 7 7 4 18 0 5 18
## [12,] 1 18 3 2 2 3 3 3 1 18 18 0 18
## [13,] 3 4 2 2 2 3 1 3 2 18 3 3 0
## [14,] 1 18 3 2 2 3 3 3 1 18 18 1 18
## [15,] 2 3 4 3 3 4 4 4 1 18 2 2 18
## [16,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [17,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [18,] 3 3 5 4 4 5 5 5 2 18 2 3 18
## [,14] [,15] [,16] [,17] [,18]
## [1,] 2 18 18 18 18
## [2,] 5 3 1 1 2
## [3,] 4 18 18 18 18
## [4,] 5 18 18 18 18
## [5,] 3 18 18 18 18
## [6,] 4 18 18 18 18
## [7,] 4 18 18 18 18
## [8,] 5 18 18 18 18
## [9,] 1 18 18 18 18
## [10,] 1 18 18 18 18
## [11,] 5 3 1 1 2
## [12,] 1 18 18 18 18
## [13,] 3 1 2 2 1
## [14,] 0 18 18 18 18
## [15,] 2 0 1 2 1
## [16,] 4 2 0 1 1
## [17,] 4 2 1 0 1
## [18,] 3 1 1 1 0
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 18 2 1 1 2 2 2 2 18 18 1 18
## [2,] 5 0 7 6 6 7 7 7 4 18 1 5 18
## [3,] 2 18 0 1 1 2 1 2 4 18 18 3 18
## [4,] 3 18 2 0 2 1 1 1 5 18 18 4 18
## [5,] 1 18 1 2 0 1 2 2 3 18 18 2 18
## [6,] 2 18 2 1 1 0 2 1 4 18 18 3 18
## [7,] 2 18 1 1 1 2 0 2 4 18 18 3 18
## [8,] 3 18 2 1 2 1 1 0 5 18 18 4 18
## [9,] 1 18 3 2 2 3 3 3 0 18 18 1 18
## [10,] 2 18 4 3 3 4 4 4 1 0 18 1 18
## [11,] 5 1 7 6 6 7 7 7 4 18 0 5 18
## [12,] 1 18 3 2 2 3 3 3 1 18 18 0 18
## [13,] 3 4 2 2 2 3 1 3 2 18 3 3 0
## [14,] 1 18 3 2 2 3 3 3 1 18 18 1 18
## [15,] 2 3 4 3 3 4 4 4 1 18 2 2 18
## [16,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [17,] 4 2 6 5 5 6 6 6 3 18 1 4 18
## [18,] 3 3 5 4 4 5 5 5 2 18 2 3 18
## [,14] [,15] [,16] [,17] [,18]
## [1,] 2 18 18 18 18
## [2,] 5 3 1 1 2
## [3,] 4 18 18 18 18
## [4,] 5 18 18 18 18
## [5,] 3 18 18 18 18
## [6,] 4 18 18 18 18
## [7,] 4 18 18 18 18
## [8,] 5 18 18 18 18
## [9,] 1 18 18 18 18
## [10,] 1 18 18 18 18
## [11,] 5 3 1 1 2
## [12,] 1 18 18 18 18
## [13,] 3 1 2 2 1
## [14,] 0 18 18 18 18
## [15,] 2 0 1 2 1
## [16,] 4 2 0 1 1
## [17,] 4 2 1 0 1
## [18,] 3 1 1 1 0
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [2,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [3,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [4,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [5,] 1 0 1 3 1 1 1 1 1 0 0 1 0
## [6,] 1 0 1 1 1 1 2 1 1 0 0 1 0
## [7,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [8,] 2 0 1 1 2 1 1 1 2 0 0 2 0
## [9,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [10,] 3 0 3 3 3 6 3 3 1 1 0 1 0
## [11,] 2 1 2 2 2 4 2 2 2 0 1 2 0
## [12,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [13,] 2 3 1 1 1 2 1 1 1 0 3 1 1
## [14,] 1 0 1 1 1 2 1 1 1 0 0 1 0
## [15,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [16,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [17,] 1 1 1 1 1 2 1 1 1 0 1 1 0
## [18,] 1 2 1 1 1 2 1 1 1 0 2 1 0
## [,14] [,15] [,16] [,17] [,18]
## [1,] 1 0 0 0 0
## [2,] 2 2 1 1 2
## [3,] 1 0 0 0 0
## [4,] 2 0 0 0 0
## [5,] 1 0 0 0 0
## [6,] 1 0 0 0 0
## [7,] 1 0 0 0 0
## [8,] 2 0 0 0 0
## [9,] 1 0 0 0 0
## [10,] 1 0 0 0 0
## [11,] 2 2 1 1 2
## [12,] 1 0 0 0 0
## [13,] 1 1 2 1 1
## [14,] 1 0 0 0 0
## [15,] 1 1 1 2 1
## [16,] 1 1 1 1 1
## [17,] 1 1 1 1 1
## [18,] 1 1 1 1 1
## Average Distance/Path length
camp.graph <- igraph::graph_from_adjacency_matrix(as.matrix(campnet))
igraph::average.path.length(camp.graph) # Average distance of 2.8## [1] 2.873786
## [1] 7
## [1] 7
# How many connected components do we have in the network? components()
# connected ="strong" means v1 and v2 are connected if there is a directed path
# from v1 to v2 and from v2 to v1.
# connected = "weak" means v1 and v2 are connected if there is a semi-path
# (i.e. path ignoring the link direction) from v1 to v2 and v2 to v1.
# Number of components:
sna::components(camp.net, connected="strong")## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
## [1] 4
## [1] 1
## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
## [1] 0.2222222
# Which node belongs to which component? component.dist()
camp.comp <- component.dist(camp.net, connected="strong")## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
## $membership
## [1] 1 2 1 1 1 1 1 1 1 3 2 1 4 1 2 2 2 2
##
## $csize
## [1] 10 6 1 1
##
## $cdist
## [1] 2 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
## [1] 1 2 1 1 1 1 1 1 1 3 2 1 4 1 2 2 2 2
## [1] 10 6 1 1
## [1] 2 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0
Which nodes in the network, if removed, will increase the number of components we find?
## [1] 1 5 11 12 18
# Let's remove one of the cutpoints and count components again.
camp.net.cut <- camp.net[-1,-1] # remember "-1" selects all bit the first row/column.
# So camp.net.cut will be camp.net with node 1 removed.
components(camp.net.cut, connected="strong")## Node 1, Reach 9, Total 9
## Node 2, Reach 6, Total 15
## Node 3, Reach 6, Total 21
## Node 4, Reach 6, Total 27
## Node 5, Reach 6, Total 33
## Node 6, Reach 6, Total 39
## Node 7, Reach 6, Total 45
## Node 8, Reach 3, Total 48
## Node 9, Reach 4, Total 52
## Node 10, Reach 9, Total 61
## Node 11, Reach 3, Total 64
## Node 12, Reach 16, Total 80
## Node 13, Reach 3, Total 83
## Node 14, Reach 9, Total 92
## Node 15, Reach 9, Total 101
## Node 16, Reach 9, Total 110
## Node 17, Reach 9, Total 119
## [1] 5
# Fragmentation (Distance weighted)
## Function to calculate distance weighted fragmentation in R
fragmentation.dw <- function(matrix){
reciprocal <- matrix(1, nrow(matrix), ncol(matrix))/geodist(matrix)$gdist
reciprocal[reciprocal==Inf] <- 0
fit <- 1 - ((2*sum(reciprocal[lower.tri(reciprocal)]))/(nrow(matrix)*(nrow(matrix)-1)))
return(fit)
}
fragmentation.dw(campnet)## [1] 0.6095549
# Reachability
# Vertices which are adjacent in the reachability graph are connected by one or more
# directed paths in the original graph; thus, structural equivalence classes in the
# reachability graph are synonymous with strongly connected components in the original structure.
reachability.matrix <- reachability(camp.net)## Node 1, Reach 10, Total 10
## Node 2, Reach 16, Total 26
## Node 3, Reach 10, Total 36
## Node 4, Reach 10, Total 46
## Node 5, Reach 10, Total 56
## Node 6, Reach 10, Total 66
## Node 7, Reach 10, Total 76
## Node 8, Reach 10, Total 86
## Node 9, Reach 10, Total 96
## Node 10, Reach 11, Total 107
## Node 11, Reach 16, Total 123
## Node 12, Reach 10, Total 133
## Node 13, Reach 17, Total 150
## Node 14, Reach 10, Total 160
## Node 15, Reach 16, Total 176
## Node 16, Reach 16, Total 192
## Node 17, Reach 16, Total 208
## Node 18, Reach 16, Total 224
# Let's go ahead and symmetrize camp.net by setting the attribute
# "directed" to F: this will add links (j,i) wherever (i,j) exists.
# You could symmetrize by minimum using symmmetrize() with rule="strong"
camp.sym <- camp.net
camp.sym <- set.network.attribute(camp.sym, "directed", F)
# The clique census returns a list with several important elements - let's assign
# that list to an object we'll call camp.net.cliques.
# The clique.comembership parameter takes values "none" (no co-membership is computed),
# "sum" (the total number of shared cliques for each pair of nodes is computed),
# or "bysize" (separate clique co-membership is computed for each clique size)
camp.net.cliques <- clique.census(camp.sym, mode = "graph", clique.comembership="sum")
camp.net.cliques # an object that now contains the results of the clique census## $clique.count
## Agg BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 5 2 0 0 1 2 1 1 0 1 0 0 0
## 3 7 0 0 2 3 1 1 3 2 0 0 0 0
## 4 3 1 1 0 0 0 0 0 0 2 1 1 2
## PAM PAT RUSS GERY MICHAEL HOLLY
## 1 0 0 0 0 0 0
## 2 1 0 1 0 0 0
## 3 1 0 2 2 1 3
## 4 0 2 0 1 1 0
##
## $clique.comemb
## BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## BRAZEY 3 0 0 1 1 0 0 0 1 0 0 1
## BILL 0 1 0 0 0 0 0 0 0 0 1 0
## JOHN 0 0 2 1 1 0 2 0 0 0 0 0
## ANN 1 0 1 4 0 1 2 2 0 0 0 0
## CAROL 1 0 1 0 3 1 1 0 0 0 0 0
## HARRY 0 0 0 1 1 2 0 1 0 0 0 0
## LEE 0 0 2 2 1 0 4 1 0 0 0 0
## JENNIE 0 0 0 2 0 1 1 2 0 0 0 0
## PAULINE 1 0 0 0 0 0 0 0 3 1 0 2
## BERT 0 0 0 0 0 0 0 0 1 1 0 1
## DON 0 1 0 0 0 0 0 0 0 0 1 0
## STEVE 1 0 0 0 0 0 0 0 2 1 0 2
## PAM 0 0 0 0 0 0 1 0 0 0 0 0
## PAT 1 0 0 0 0 0 0 0 2 1 0 2
## RUSS 0 0 0 0 0 0 0 0 1 0 0 0
## GERY 0 1 0 0 0 0 0 0 0 0 1 0
## MICHAEL 0 1 0 0 0 0 0 0 0 0 1 0
## HOLLY 0 0 0 0 0 0 0 0 0 0 0 0
## PAM PAT RUSS GERY MICHAEL HOLLY
## BRAZEY 0 1 0 0 0 0
## BILL 0 0 0 1 1 0
## JOHN 0 0 0 0 0 0
## ANN 0 0 0 0 0 0
## CAROL 0 0 0 0 0 0
## HARRY 0 0 0 0 0 0
## LEE 1 0 0 0 0 0
## JENNIE 0 0 0 0 0 0
## PAULINE 0 2 1 0 0 0
## BERT 0 1 0 0 0 0
## DON 0 0 0 1 1 0
## STEVE 0 2 0 0 0 0
## PAM 2 0 1 0 0 1
## PAT 0 2 0 0 0 0
## RUSS 1 0 3 1 0 2
## GERY 0 0 1 3 2 2
## MICHAEL 0 0 0 2 2 1
## HOLLY 1 0 2 2 1 3
##
## $cliques
## $cliques[[1]]
## NULL
##
## $cliques[[2]]
## $cliques[[2]][[1]]
## [1] 9 15
##
## $cliques[[2]][[2]]
## [1] 5 6
##
## $cliques[[2]][[3]]
## [1] 1 5
##
## $cliques[[2]][[4]]
## [1] 7 13
##
## $cliques[[2]][[5]]
## [1] 1 4
##
##
## $cliques[[3]]
## $cliques[[3]][[1]]
## [1] 15 16 18
##
## $cliques[[3]][[2]]
## [1] 4 6 8
##
## $cliques[[3]][[3]]
## [1] 13 15 18
##
## $cliques[[3]][[4]]
## [1] 4 7 8
##
## $cliques[[3]][[5]]
## [1] 3 5 7
##
## $cliques[[3]][[6]]
## [1] 3 4 7
##
## $cliques[[3]][[7]]
## [1] 16 17 18
##
##
## $cliques[[4]]
## $cliques[[4]][[1]]
## [1] 9 10 12 14
##
## $cliques[[4]][[2]]
## [1] 1 9 12 14
##
## $cliques[[4]][[3]]
## [1] 2 11 16 17
# The first element of the result list is clique.count: a matrix containing the number of cliques of different sizes (size = number of nodes in the clique).The first column (named Agg) gives you the total number of cliqies of each size,the rest of the columns show the number of cliques each node participates in.
#
# Note that this includes cliques of sizes 1 & 2. We have those when the largest
# fully connected structure includes just 1 or 2 nodes.
camp.net.cliques$clique.count## Agg BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## 2 5 2 0 0 1 2 1 1 0 1 0 0 0
## 3 7 0 0 2 3 1 1 3 2 0 0 0 0
## 4 3 1 1 0 0 0 0 0 0 2 1 1 2
## PAM PAT RUSS GERY MICHAEL HOLLY
## 1 0 0 0 0 0 0
## 2 1 0 1 0 0 0
## 3 1 0 2 2 1 3
## 4 0 2 0 1 1 0
## BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE BERT DON STEVE
## BRAZEY 3 0 0 1 1 0 0 0 1 0 0 1
## BILL 0 1 0 0 0 0 0 0 0 0 1 0
## JOHN 0 0 2 1 1 0 2 0 0 0 0 0
## ANN 1 0 1 4 0 1 2 2 0 0 0 0
## CAROL 1 0 1 0 3 1 1 0 0 0 0 0
## HARRY 0 0 0 1 1 2 0 1 0 0 0 0
## LEE 0 0 2 2 1 0 4 1 0 0 0 0
## JENNIE 0 0 0 2 0 1 1 2 0 0 0 0
## PAULINE 1 0 0 0 0 0 0 0 3 1 0 2
## BERT 0 0 0 0 0 0 0 0 1 1 0 1
## DON 0 1 0 0 0 0 0 0 0 0 1 0
## STEVE 1 0 0 0 0 0 0 0 2 1 0 2
## PAM 0 0 0 0 0 0 1 0 0 0 0 0
## PAT 1 0 0 0 0 0 0 0 2 1 0 2
## RUSS 0 0 0 0 0 0 0 0 1 0 0 0
## GERY 0 1 0 0 0 0 0 0 0 0 1 0
## MICHAEL 0 1 0 0 0 0 0 0 0 0 1 0
## HOLLY 0 0 0 0 0 0 0 0 0 0 0 0
## PAM PAT RUSS GERY MICHAEL HOLLY
## BRAZEY 0 1 0 0 0 0
## BILL 0 0 0 1 1 0
## JOHN 0 0 0 0 0 0
## ANN 0 0 0 0 0 0
## CAROL 0 0 0 0 0 0
## HARRY 0 0 0 0 0 0
## LEE 1 0 0 0 0 0
## JENNIE 0 0 0 0 0 0
## PAULINE 0 2 1 0 0 0
## BERT 0 1 0 0 0 0
## DON 0 0 0 1 1 0
## STEVE 0 2 0 0 0 0
## PAM 2 0 1 0 0 1
## PAT 0 2 0 0 0 0
## RUSS 1 0 3 1 0 2
## GERY 0 0 1 3 2 2
## MICHAEL 0 0 0 2 2 1
## HOLLY 1 0 2 2 1 3
# The third element of the clique census result is a list of all found cliques:
# (Remember that a list can have another list as its element)
camp.net.cliques$cliques # a full list of cliques, all sizes## [[1]]
## NULL
##
## [[2]]
## [[2]][[1]]
## [1] 9 15
##
## [[2]][[2]]
## [1] 5 6
##
## [[2]][[3]]
## [1] 1 5
##
## [[2]][[4]]
## [1] 7 13
##
## [[2]][[5]]
## [1] 1 4
##
##
## [[3]]
## [[3]][[1]]
## [1] 15 16 18
##
## [[3]][[2]]
## [1] 4 6 8
##
## [[3]][[3]]
## [1] 13 15 18
##
## [[3]][[4]]
## [1] 4 7 8
##
## [[3]][[5]]
## [1] 3 5 7
##
## [[3]][[6]]
## [1] 3 4 7
##
## [[3]][[7]]
## [1] 16 17 18
##
##
## [[4]]
## [[4]][[1]]
## [1] 9 10 12 14
##
## [[4]][[2]]
## [1] 1 9 12 14
##
## [[4]][[3]]
## [1] 2 11 16 17
## NULL
## [[1]]
## [1] 9 15
##
## [[2]]
## [1] 5 6
##
## [[3]]
## [1] 1 5
##
## [[4]]
## [1] 7 13
##
## [[5]]
## [1] 1 4
## [[1]]
## [1] 15 16 18
##
## [[2]]
## [1] 4 6 8
##
## [[3]]
## [1] 13 15 18
##
## [[4]]
## [1] 4 7 8
##
## [[5]]
## [1] 3 5 7
##
## [[6]]
## [1] 3 4 7
##
## [[7]]
## [1] 16 17 18
## [[1]]
## [1] 9 10 12 14
##
## [[2]]
## [1] 1 9 12 14
##
## [[3]]
## [1] 2 11 16 17
## BRAZEY BILL JOHN ANN CAROL HARRY LEE JENNIE PAULINE
## 3 3 3 3 3 3 3 3 3
## BERT DON STEVE PAM PAT RUSS GERY MICHAEL HOLLY
## 3 3 3 3 3 3 3 3 3
# igraph object of campnet
camp.graph <- igraph::graph_from_adjacency_matrix(as.matrix(campnet), mode="directed")
## Girvan-Newman edge-betweenness method
# It calulates the edge betweenness of the network, removes the edge with the highest betweenness,
# checks to see if it broke the network into components, if it broke then it calculates modularity.
# It keeps doing this until modularity starts to decrease. It works on the assumption that edges
# with high betweenness are bridging communities together.
com <-igraph::edge.betweenness.community(camp.graph)
# WARNING: Do not assign this value to a vertex property called membership that attribute is reserved.
# It will mess up different aspects of your analysis and visualization.
# Just assign it to something like m or memb as I’ve done here.
igraph::V(camp.graph)$memb <- com$membership
# Check modularity score
igraph::modularity(com)## [1] 0.569273
# Blockmodeling is the Classical SNA Approach
# Goal is to group nodes based on structural equivalence
# Run a block model, identifying the number of clusters
ALLYbm <- blockmodel(AllyNet, equiv.clust(AllyNet), k=6)
# Create Vector for block membership, and fill
bmem <- numeric(length(ALLYbm$block.content))
bmem[ALLYbm$order.vec] <- ALLYbm$block.membership
bcols <- c("black","white","red","blue","yellow","green")
# Take a look at them
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F, vertex.col=bcols[bmem])# Try it with 8 communities
ALLYbm <- blockmodel(AllyNet, equiv.clust(AllyNet), k=8)
bmem <- numeric(length(ALLYbm$block.content))
bmem[ALLYbm$order.vec] <- ALLYbm$block.membership
bcols <- c("black","white","red","blue","yellow", "grey50","brown","green")
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F, vertex.col=bcols[bmem])\[ \text{Modularity} = \frac{1}{2m}\frac{\sum_{ij}[A_{ij}-k_ik_j]}{2m}1(c_ic_j) \]
where,
# Modularity-based community detection popular in physics
# Modularity = Dense within communities, sparse across
library(igraph)
# Convert into a graph
AllyGR <- graph.adjacency(AllyNet,mode="undirected")
mem0 <- bmem-1 # first group is 0 in igraph
mem <- leading.eigenvector.community.step(AllyGR, membership=mem0)$membership
# Loop so membership stays the same
i <- 0
while(mean(mem0==mem)!=1){
mem0 <- mem
mem <- leading.eigenvector.community.step(AllyGR, membership=mem0)$membership
i <- i +1
}
# Get memberships and plot
mem <- mem +1
detach("package:igraph")
bcols <- c("black","white","red","blue","yellow", "grey50","brown","green", "pink", "orange")
gplot(AllyNet, label = network.vertex.names(AllyNet),edge.col="grey55", label.cex=.75, gmode="graph", displayisolates=F, vertex.col=bcols[mem])
# How Many Communities? Try mode = mds, eigen, geodist, rmds
gplot(SenNet2, label = network.vertex.names(SenNet), edge.col="grey55", label.cex=.75, displayisolates=F, mode="geodist")
# Run a blockmodel
SENbm <- blockmodel(SenNet2, equiv.clust(SenNet2), k=6)
# Membership
bmem <- numeric(length(SENbm$block.content))
bmem[SENbm$order.vec] <- SENbm$block.membership
bcols <- c("black","white","red","blue","yellow", "grey50")
# Plot
gplot(SenNet2, label = network.vertex.names(SenNet), edge.col="grey55", label.cex=.75, displayisolates=F, mode="geodist", vertex.col=bcols[bmem])### Functions
kpp.neg <- function(x,k,n.sim){
if (length(rownames(x)) == 0) rownames(x) <- c(1:nrow(x))
if (length(colnames(x)) == 0) colnames(x) <- c(1:ncol(x))
if (missing(n.sim)) n.sim <- 100
sims <- array(0, dim=c(n.sim, 2))
for (sim in 1:n.sim){
# Populate set S with 3 random nodes, without replacement
set <- strsplit(sample(colnames(x), k, replace = FALSE, prob = NULL), " ")
# Remove nodes from network x and calculate F = fit
matrix <- x[-which(rownames(x) %in% set), -which(colnames(x) %in% set)]
reciprocal <- matrix(1, nrow(matrix), ncol(matrix))/geodist(matrix)$gdist
reciprocal[reciprocal==Inf] <- 0
fit <- 1 - ((2*sum(reciprocal[lower.tri(reciprocal)]))/(nrow(matrix)*(nrow(matrix)-1)))
non.set <- strsplit(rownames(matrix), " ")
pairs <- array(0, dim=c(k*length(non.set), 2))
#colnames(pairs) <- c("Pair", "Delta_F")
terminate <- FALSE
while (terminate==FALSE){
i <- 1
for (u in set){
for (v in non.set){
pairs[i,1] <- paste(set[[which(set %in% u)]],non.set[[which(non.set %in% v)]],sep=',')
set.new <- set
set.new[[which(set %in% u)]] <- non.set[[which(non.set %in% v)]]
matrix <- x[-which(rownames(x) %in% set.new), -which(colnames(x) %in% set.new)]
reciprocal <- matrix(1, nrow(matrix), ncol(matrix))/geodist(matrix)$gdist
reciprocal[reciprocal==Inf] <- 0
fit.new <- 1 - ((2*sum(reciprocal[lower.tri(reciprocal)]))/(nrow(matrix)*(nrow(matrix)-1)))
delta.fit <- fit.new - fit
pairs[i,2] <- delta.fit
i <- i+1
}
}
if (length(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1])>1){
all.pairs <- strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",") # All pairs that improve fit equally
pair.max <- strsplit(sample(all.pairs, 1, replace=FALSE)[[1]], " ")
} else {pair.max <- strsplit(strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",")[[1]]," ")}
terminate <- max(as.numeric(pairs[,2]))<=0
if (max(as.numeric(pairs[,2]))>0){
fit <- fit + max(as.numeric(pairs[,2]))
set[[which(set %in% pair.max)]] <- pair.max[[which(!(pair.max %in% set))]]
matrix <- x[-which(rownames(x) %in% set), -which(colnames(x) %in% set)]
non.set <- strsplit(rownames(matrix), " ")
pairs <- array(0, dim=c(k*length(non.set), 2))
colnames(pairs) <- c("Pair", "Delta_F")
}
}
output <- data.frame(optimal_set=paste("[",paste(as.character(set), collapse=","),"]", sep=''),
max_fragmentation=fit)
sims[[sim,1]] <- as.character(output$optimal_set)
sims[[sim,2]] <- output$max_fragmentation
}
count <- table(sims[,1][sims[,2]==max(as.numeric(sims[,2]))])
output <- data.frame(optimal_set=sample(names(count)[count==max(count)], 1, replace=FALSE),
max_fragmentation=max(as.numeric(sims[,2])))
return(output)
}
kpp.pos <- function(x,k,n.sim){
if (missing(n.sim)) n.sim <- 100
sims <- array(0, dim=c(n.sim, 4))
for (sim in 1:n.sim){
# Sub-function
colmax <- function(matrix) apply(matrix, 2, max)
# Main code
if (length(rownames(x)) == 0) rownames(x) <- c(1:nrow(x))
if (length(colnames(x)) == 0) colnames(x) <- c(1:ncol(x))
set <- strsplit(sample(colnames(x), k, replace = FALSE, prob = NULL), " ")
reciprocal <- matrix(1, nrow(x), ncol(x))/geodist(x)$gdist
reciprocal[reciprocal==Inf] <- 1
rownames(reciprocal) <- rownames(x)
colnames(reciprocal) <- colnames(x)
non.set <- strsplit(rownames(x[-which(rownames(x) %in% set), -which(colnames(x) %in% set)]), " ")
fit <- matrix(0,1,3)
fit[1,1] <- (sum(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% strsplit(colnames(x), " "))]) ))/ncol(x)
fit[1,2] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])==1))
fit[1,3] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])>0))/ncol(x)
#fit.alt <- (sum(colmax(reciprocal[which(rownames(reciprocal) %in% set),which(colnames(reciprocal) %in% non.set)])))/length(non.set)
pairs <- array(0, dim=c(k*length(non.set), 4))
terminate <- FALSE
while (terminate==FALSE){
i <- 1
for (u in set){
for (v in non.set){
pairs[i,1] <- paste(set[[which(set %in% u)]],non.set[[which(non.set %in% v)]],sep=',')
set.new <- set
set.new[[which(set %in% u)]] <- non.set[[which(non.set %in% v)]]
non.set.new <- non.set
non.set.new[[which(non.set %in% v)]] <- set[[which(set %in% u)]]
fit.new <- matrix(0,1,3)
fit.new[1,1] <- (sum(colmax(reciprocal[which(rownames(reciprocal) %in% set.new),
which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])))/ncol(x)
fit.new[1,2] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set.new),
which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])==1))
fit.new[1,3] <- length(which(colmax(reciprocal[which(rownames(reciprocal) %in% set.new),
which(colnames(reciprocal) %in% strsplit(colnames(x), " "))])>0))/ncol(x)
delta.fit <- fit.new[1,1] - fit[1,1]
pairs[i, 2] <- delta.fit
pairs[i, 3] <- fit.new[,2]
pairs[i, 4] <- fit.new[,3]
i <- i+1
}
}
if (length(pairs[which( as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1]) > 1){ # Do we have multiple optimal pairs?
all.pairs <- strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",") # All pairs that improve fit equally
pair.max <- strsplit(sample(all.pairs, 1, replace=FALSE)[[1]], " ")
} else {pair.max <- strsplit(strsplit(pairs[which(as.numeric(pairs[,2]) == max(as.numeric(pairs[,2]))),1], ",")[[1]]," ")}
terminate <- max(as.numeric(pairs[,2]))<=0 # terminate if delta.fit <= 0
if (max(as.numeric(pairs[,2]))>0){
fit[1,1] <- as.numeric(fit[1,1] + max(as.numeric(pairs[,2])))
fit[1,2] <- as.numeric(pairs[which(pairs[,1] == paste(pair.max, collapse=",")),3])
fit[1,3] <- as.numeric(pairs[which(pairs[,1] == paste(pair.max, collapse=",")),4])
set[[which(set %in% pair.max)]] <- pair.max[[which(!(pair.max %in% set))]]
non.set[[which(non.set %in% pair.max)]] <- pair.max[[which(!(pair.max %in% non.set))]]
pairs <- array(0, dim=c(k*length(non.set), 4))
colnames(pairs) <- c("Pair", "Delta_F", "Nodes", "Net_Prop")
}
}
output <- data.frame(optimal_set=paste("[",paste(as.character(set), collapse=","),"]", sep=''),
distance_w_reach=fit[,1], nodes = fit[,2], net_prop = fit[,3] )
sims[[sim,1]] <- as.character(output$optimal_set)
sims[[sim,2]] <- output$distance_w_reach
sims[[sim,3]] <- output$nodes
sims[[sim,4]] <- output$net_prop
}
count <- table(sims[,1][sims[,2]==max(as.numeric(sims[,2]))])
output <- data.frame(optimal_set=sample(names(count)[count==max(count)], 1, replace=FALSE),
distance_w_reach=max(as.numeric(sims[,2])), nodes = 0, net_prop = 0)
output$nodes <- as.numeric(sample(sims[,3][sims[,1] == as.character(output$optimal_set)], 1, replace=FALSE))
output$net_prop <- as.numeric(sample(sims[,4][sims[,1] == as.character(output$optimal_set)], 1, replace=FALSE))
return(output)
}Now assume the network is stochastic; We might have hypotheses about the stochastic process:
- Nodes that are similar are likely to form ties: Homophily
- Directed edges are likely to be reciprocated: Reciprocity
- A friend of a friend is a friend: Transitivity
What we have done thus far treats the network as fixed. This distinction defines the traditional divide between social and mathematical sciences approaches to network analysis.
Autocorrelation is a measure of how much those near to us influence our outcomes. This is easiest to understand in terms of time. Over time, the value of a particular stock - or any other thing of value - is very likely related to the value it held yesterday. We can observe similar situations with respect to physical proximity. The wealth of particular cities can be related to the wealth of the cities that are adjacent to them. Although it is possible for a wealthy city to be adjacent to much poorer cities, it is also likely that any increases in the wealth of a particular city can cause spillovers into neighboring communities. The spillover may not be always be direct, but there is likely to be a relationship there. Network autocorrelation works in much the same way. Though, rather than using physical proximity, network autocorrelation uses network proximity to test whether an individuals’ continuous attributes are related to those of their neighbors. In other words, we can ask quesitons about how a particular attribute appears to be related to the ties within that network. For example: Does wealth attract wealth? Do people with dissimilar wealth tend to form ties? Or, does wealth appear to have little to do with one’s immediate neighborhood?
Moran’s I ranges from -1 to 1 and is interpreted like Pearson’s Correlation Coefficient.
Geary’s C ranges from 0 to 2.
data(florentine)
# Compute the network correlation:
# Test the correlation using qaptest:
# qaptest(A.List.Of.Networks, Some.Network.Function, reps=100)
# where reps is the number of graphs to be generated.
# Out of a 100 permutations, how many had >= or <= correlation than the observed value?
# (the parameters g1=1 g2=2 tell the test to compare the first & second element of
# the list of networks that is the first argument of qaptest)
flo.qap <- qaptest(list(flomarriage,flobusiness), gcor, g1=1, g2=2, reps=100)
flo.qap##
## QAP Test Results
##
## Estimated p-values:
## p(f(perm) >= f(d)): 0
## p(f(perm) <= f(d)): 1
## Import data
AHS_Base <- rio::import('./data/ahs_wpvar.csv')
AHS_adjlist <- AHS_Base %>%
select(ego_nid, mfnid_1:mfnid_5, ffnid_1:ffnid_5, grade, sex, PSMOKES,commcnt) %>%
filter(commcnt==1);
AHS_Edges <- AHS_adjlist %>%
rename( id = `ego_nid`,
gender = `sex`) %>%
gather(Alter_Label, Target, mfnid_1:mfnid_5, ffnid_1:ffnid_5, na.rm = TRUE)
AHS_Edges=AHS_Edges %>% filter (Target != 99999);
AHS_Edges=AHS_Edges %>%select(id, Target);
# Compute attributes
library(statnet)
g=as.network(AHS_Edges)
g %v% "grade" <- AHS_adjlist$grade
g %v% "sex" <- AHS_adjlist$sex
g %v% "smokes" <- AHS_adjlist$PSMOKES
# get degree scores
outdegree <- degree(g, cmode = "outdegree")
indegree<- degree(g, cmode = "indegree")
g %v% "indegree" <- indegree
g %v% "outdegree" <- outdegree
g %v% "degree" <- degree(g)
plot(g, vertex.col="grade")## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges
##
## Iterations: 6 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -2.7275 0.0591 0 -46.15 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 2293 on 4969 degrees of freedom
##
## AIC: 2295 BIC: 2302 (Smaller is better.)
#interpretation of model coefficients:
#the log-odds of any tie existing is -2.7275
#probability: exp(-2.7275)/(1+exp(-2.7275))=.0613
exp(-2.7275)/(1+exp(-2.7275))## [1] 0.06137001
## [1] 0.06136821
# Add some covariates...
mod.homoph<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')) ## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes")
##
## Iterations: 7 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -4.5787 0.1850 0 -24.752 < 1e-04 ***
## nodematch.sex 0.4621 0.1311 0 3.524 0.000425 ***
## nodematch.grade 3.0493 0.1422 0 21.446 < 1e-04 ***
## nodematch.smokes 0.4062 0.1483 0 2.738 0.006174 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1712 on 4966 degrees of freedom
##
## AIC: 1720 BIC: 1746 (Smaller is better.)
#add some simple volume bitscovariates...
mod.p1<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+sender+receiver) ## Observed statistic(s) sender4, sender5, sender15, sender17, sender42, sender57, sender63, receiver4, receiver17, and receiver44 are at their smallest attainable values. Their coefficients will be fixed at -Inf.
## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Stopping at the initial estimate.
## Evaluating log-likelihood at the estimate.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## sender + receiver
##
## Iterations: 7 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -6.54235 1.39273 0 -4.698 <1e-04 ***
## nodematch.sex 0.72340 0.15509 0 4.664 <1e-04 ***
## nodematch.grade 3.60634 0.17189 0 20.981 <1e-04 ***
## nodematch.smokes 1.27256 0.20774 0 6.126 <1e-04 ***
## sender2 2.22577 1.25583 0 1.772 0.0763 .
## sender3 0.85358 1.31365 0 0.650 0.5158
## sender4 -Inf 0.00000 0 -Inf <1e-04 ***
## sender5 -Inf 0.00000 0 -Inf <1e-04 ***
## sender6 1.17264 1.29221 0 0.907 0.3642
## sender7 1.82007 1.28618 0 1.415 0.1570
## sender8 2.33419 1.27155 0 1.836 0.0664 .
## sender9 2.03254 1.27211 0 1.598 0.1101
## sender10 -0.12412 1.41704 0 -0.088 0.9302
## sender11 -0.74302 1.60190 0 -0.464 0.6428
## sender12 -1.01392 1.57899 0 -0.642 0.5208
## sender13 1.44915 1.30637 0 1.109 0.2673
## sender14 1.33064 1.32471 0 1.004 0.3152
## sender15 -Inf 0.00000 0 -Inf <1e-04 ***
## sender16 2.34335 1.25453 0 1.868 0.0618 .
## sender17 -Inf 0.00000 0 -Inf <1e-04 ***
## sender18 1.99117 1.29061 0 1.543 0.1229
## sender19 -0.07322 1.40150 0 -0.052 0.9583
## sender20 1.52207 1.28439 0 1.185 0.2360
## sender21 2.09979 1.26439 0 1.661 0.0968 .
## sender22 0.88576 1.34555 0 0.658 0.5104
## sender23 1.46870 1.28751 0 1.141 0.2540
## sender24 0.88576 1.34555 0 0.658 0.5104
## sender25 1.25861 1.34235 0 0.938 0.3484
## sender26 1.34868 1.27622 0 1.057 0.2906
## sender27 0.51817 1.35188 0 0.383 0.7015
## sender28 0.30073 1.35208 0 0.222 0.8240
## sender29 2.43662 1.28473 0 1.897 0.0579 .
## sender30 1.36028 1.32210 0 1.029 0.3035
## sender31 1.88595 1.28043 0 1.473 0.1408
## sender32 0.72578 1.32209 0 0.549 0.5830
## sender33 2.09444 1.25897 0 1.664 0.0962 .
## sender34 2.92916 1.26585 0 2.314 0.0207 *
## sender35 1.48498 1.28646 0 1.154 0.2484
## sender36 2.04011 1.27129 0 1.605 0.1085
## sender37 1.05055 1.32819 0 0.791 0.4290
## sender38 2.06804 1.28481 0 1.610 0.1075
## sender39 1.03670 1.40765 0 0.736 0.4614
## sender40 1.71434 1.26586 0 1.354 0.1756
## sender41 -0.86750 1.56951 0 -0.553 0.5805
## sender42 -Inf 0.00000 0 -Inf <1e-04 ***
## sender43 1.97972 1.28468 0 1.541 0.1233
## sender44 1.84499 1.28992 0 1.430 0.1526
## sender45 0.37849 1.35551 0 0.279 0.7801
## sender46 1.20985 1.30077 0 0.930 0.3523
## sender47 -0.07322 1.40150 0 -0.052 0.9583
## sender48 0.83024 1.30801 0 0.635 0.5256
## sender49 1.82994 1.29996 0 1.408 0.1592
## sender50 0.39527 1.35435 0 0.292 0.7704
## sender51 2.59763 1.28047 0 2.029 0.0425 *
## sender52 2.39671 1.28765 0 1.861 0.0627 .
## sender53 0.74770 1.30858 0 0.571 0.5677
## sender54 0.63491 1.34110 0 0.473 0.6359
## sender55 0.87978 1.41662 0 0.621 0.5346
## sender56 1.10414 1.30234 0 0.848 0.3965
## sender57 -Inf 0.00000 0 -Inf <1e-04 ***
## sender58 2.32900 1.29162 0 1.803 0.0714 .
## sender59 0.96191 1.31592 0 0.731 0.4648
## sender60 1.20635 1.29933 0 0.928 0.3532
## sender61 2.62317 1.27827 0 2.052 0.0402 *
## sender62 -0.79590 1.57732 0 -0.505 0.6138
## sender63 -Inf 0.00000 0 -Inf <1e-04 ***
## sender64 1.37381 1.28712 0 1.067 0.2858
## sender65 2.34097 1.29990 0 1.801 0.0717 .
## sender66 2.35099 1.28906 0 1.824 0.0682 .
## sender67 0.63782 1.41165 0 0.452 0.6514
## sender68 0.77762 1.32296 0 0.588 0.5567
## sender69 1.78102 1.27294 0 1.399 0.1618
## sender70 0.09125 1.43162 0 0.064 0.9492
## sender71 0.86733 1.45067 0 0.598 0.5499
## receiver2 -1.43489 1.07362 0 -1.337 0.1814
## receiver3 -1.69160 1.06305 0 -1.591 0.1115
## receiver4 -Inf 0.00000 0 -Inf <1e-04 ***
## receiver5 -0.79045 0.97874 0 -0.808 0.4193
## receiver6 -1.66543 1.06255 0 -1.567 0.1170
## receiver7 -0.66723 0.95563 0 -0.698 0.4851
## receiver8 0.22141 0.91166 0 0.243 0.8081
## receiver9 0.36172 0.86484 0 0.418 0.6758
## receiver10 -0.87903 0.95032 0 -0.925 0.3550
## receiver11 -1.25341 1.00146 0 -1.252 0.2107
## receiver12 -2.50486 1.27755 0 -1.961 0.0499 *
## receiver13 -1.65921 1.08480 0 -1.529 0.1261
## receiver14 -0.19915 0.93421 0 -0.213 0.8312
## receiver15 -0.21006 0.90663 0 -0.232 0.8168
## receiver16 -1.03081 0.99540 0 -1.036 0.3004
## receiver17 -Inf 0.00000 0 -Inf <1e-04 ***
## receiver18 -1.00664 1.02771 0 -0.979 0.3273
## receiver19 -0.94528 0.92624 0 -1.021 0.3075
## receiver20 0.13942 0.88488 0 0.158 0.8748
## receiver21 -1.59147 1.08360 0 -1.469 0.1419
## receiver22 -1.73458 1.09012 0 -1.591 0.1116
## receiver23 -0.12357 0.88993 0 -0.139 0.8896
## receiver24 -1.73458 1.09012 0 -1.591 0.1116
## receiver25 -0.42157 0.98180 0 -0.429 0.6676
## receiver26 -1.22260 0.96921 0 -1.261 0.2072
## receiver27 0.32993 0.87520 0 0.377 0.7062
## receiver28 -2.47186 1.27910 0 -1.932 0.0533 .
## receiver29 0.47522 0.91342 0 0.520 0.6029
## receiver30 0.12959 0.91004 0 0.142 0.8868
## receiver31 0.51347 0.87020 0 0.590 0.5551
## receiver32 -1.66313 1.06639 0 -1.560 0.1189
## receiver33 0.31993 0.87070 0 0.367 0.7133
## receiver34 1.66643 0.85103 0 1.958 0.0502 .
## receiver35 0.12686 0.87443 0 0.145 0.8846
## receiver36 0.57683 0.85120 0 0.678 0.4980
## receiver37 -0.54970 0.94518 0 -0.582 0.5608
## receiver38 0.20867 0.91396 0 0.228 0.8194
## receiver39 -0.30300 0.98849 0 -0.307 0.7592
## receiver40 -1.12114 0.97563 0 -1.149 0.2505
## receiver41 -1.32436 0.96960 0 -1.366 0.1720
## receiver42 0.75364 0.87979 0 0.857 0.3917
## receiver43 -0.14799 0.92850 0 -0.159 0.8734
## receiver44 -Inf 0.00000 0 -Inf <1e-04 ***
## receiver45 -1.25725 0.99393 0 -1.265 0.2059
## receiver46 -0.15188 0.90265 0 -0.168 0.8664
## receiver47 -0.94528 0.92624 0 -1.021 0.3075
## receiver48 -0.34516 0.87911 0 -0.393 0.6946
## receiver49 -0.28730 0.97578 0 -0.294 0.7684
## receiver50 -0.47568 0.91303 0 -0.521 0.6024
## receiver51 1.82942 0.84156 0 2.174 0.0297 *
## receiver52 0.59043 0.90601 0 0.652 0.5146
## receiver53 -1.74439 1.05338 0 -1.656 0.0977 .
## receiver54 0.41471 0.84789 0 0.489 0.6248
## receiver55 -1.61996 1.28711 0 -1.259 0.2082
## receiver56 -1.72440 1.07485 0 -1.604 0.1086
## receiver57 -1.70193 1.08087 0 -1.575 0.1153
## receiver58 0.05836 0.94460 0 0.062 0.9507
## receiver59 -0.63772 0.95194 0 -0.670 0.5029
## receiver60 0.34174 0.86306 0 0.396 0.6921
## receiver61 1.19676 0.87435 0 1.369 0.1711
## receiver62 -1.68211 1.08277 0 -1.554 0.1203
## receiver63 -0.43936 1.06709 0 -0.412 0.6805
## receiver64 -2.49890 1.28600 0 -1.943 0.0520 .
## receiver65 -0.37488 1.01075 0 -0.371 0.7107
## receiver66 0.68781 0.90364 0 0.761 0.4466
## receiver67 -0.02045 0.94255 0 -0.022 0.9827
## receiver68 -0.77370 0.94078 0 -0.822 0.4108
## receiver69 0.15329 0.88341 0 0.174 0.8622
## receiver70 -1.75929 1.08335 0 -1.624 0.1044
## receiver71 -0.47914 1.11343 0 -0.430 0.6670
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 2292 on 4826 degrees of freedom
##
## AIC: 2580 BIC: 3517 (Smaller is better.)
##
## Warning: The following terms have infinite coefficient estimates:
## sender4 sender5 sender15 sender17 sender42 sender57 sender63 receiver4 receiver17 receiver44
#model predicts better, but huge BIC cost! Simplify?
g %v% "indegree" <- indegree
g %v% "outdegree" <- outdegree
mod.p2<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual)## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.815023636105832.
## The log-likelihood improved by 3.28.
## Iteration 2 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.3939.
## Step length converged once. Increasing MCMC sample size.
## Iteration 3 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.04178.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual
##
## Iterations: 3 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.36350 0.38406 0 -21.776 <1e-04 ***
## nodematch.sex 0.53097 0.13152 0 4.037 <1e-04 ***
## nodematch.grade 2.60895 0.16372 0 15.936 <1e-04 ***
## nodematch.smokes 0.87707 0.16218 0 5.408 <1e-04 ***
## nodeicov.indegree 0.29141 0.02938 0 9.918 <1e-04 ***
## nodeocov.outdegree 0.33089 0.03420 0 9.674 <1e-04 ***
## mutual 2.33517 0.23219 0 10.057 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1322 on 4963 degrees of freedom
##
## AIC: 1336 BIC: 1381 (Smaller is better.)
mod.gendmut<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual(same="sex",diff=TRUE))## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.795084745962182.
## The log-likelihood improved by 2.986.
## Iteration 2 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.4863.
## Step length converged once. Increasing MCMC sample size.
## Iteration 3 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.04361.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual(same = "sex",
## diff = TRUE)
##
## Iterations: 3 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.18804 0.38840 0 -21.081 <1e-04 ***
## nodematch.sex -0.15083 0.17796 0 -0.848 0.397
## nodematch.grade 2.83825 0.16214 0 17.505 <1e-04 ***
## nodematch.smokes 0.95409 0.15506 0 6.153 <1e-04 ***
## nodeicov.indegree 0.30766 0.02858 0 10.763 <1e-04 ***
## nodeocov.outdegree 0.34895 0.03465 0 10.071 <1e-04 ***
## mutual.same.sex.1 2.80250 0.38101 0 7.355 <1e-04 ***
## mutual.same.sex.2 2.51012 0.33447 0 7.505 <1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1340 on 4962 degrees of freedom
##
## AIC: 1356 BIC: 1408 (Smaller is better.)
#let's try a transitivity term...
mod.tran<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual+triangle, control=control.ergm(MCMLE.maxit=2))## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 2:
## Optimizing with step length 0.490824434226245.
## The log-likelihood improved by 8.98.
## Iteration 2 of at most 2:
## Optimizing with step length 0.
## The log-likelihood improved by < 0.0001.
## MCMLE estimation did not converge after 2 iterations. The estimated coefficients may not be accurate. Estimation may be resumed by passing the coefficients as initial values; see 'init' under ?control.ergm for details.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual + triangle
##
## Iterations: 2 out of 2
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.447329 0.411812 11 -20.513 < 1e-04 ***
## nodematch.sex 0.695422 0.567867 1 1.225 0.22072
## nodematch.grade 2.099324 0.726925 1 2.888 0.00388 **
## nodematch.smokes 0.779930 0.691021 1 1.129 0.25904
## nodeicov.indegree 0.277257 0.196627 1 1.410 0.15852
## nodeocov.outdegree 0.326232 0.177501 1 1.838 0.06607 .
## mutual 2.414219 0.264785 16 9.118 < 1e-04 ***
## triangle 0.104111 0.001974 5 52.739 < 1e-04 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1348 on 4962 degrees of freedom
##
## AIC: 1364 BIC: 1416 (Smaller is better.)
## Sample statistics summary:
##
## Iterations = 16384:1063936
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 1024
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges -4287.7 56.985 1.78079 54.7227
## nodematch.sex -2090.1 26.753 0.83603 23.0149
## nodematch.grade -604.9 2.069 0.06467 0.6487
## nodematch.smokes -3011.7 46.767 1.46147 44.8705
## nodeicov.indegree -18412.3 182.011 5.68784 164.6501
## nodeocov.outdegree -18510.7 183.198 5.72495 164.6910
## mutual -2209.4 28.394 0.88732 28.1258
## triangle -404607.5 7628.479 238.38996 7742.9516
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -4393.0 -4278 -4256 -4254 -4252
## nodematch.sex -2140.0 -2086 -2075 -2074 -2072
## nodematch.grade -609.4 -606 -604 -603 -603
## nodematch.smokes -3098.0 -3004 -2985 -2984 -2982
## nodeicov.indegree -18745.0 -18393 -18310 -18301 -18294
## nodeocov.outdegree -18855.0 -18490 -18409 -18400 -18390
## mutual -2262.0 -2197 -2193 -2193 -2193
## triangle -418517.0 -400856 -400285 -400281 -400279
##
##
## Sample statistics cross-correlations:
## edges nodematch.sex nodematch.grade
## edges 1.0000000 0.9992766 0.8580457
## nodematch.sex 0.9992766 1.0000000 0.8600494
## nodematch.grade 0.8580457 0.8600494 1.0000000
## nodematch.smokes 0.9997922 0.9989568 0.8529524
## nodeicov.indegree 0.9994724 0.9992095 0.8577262
## nodeocov.outdegree 0.9995827 0.9988004 0.8587801
## mutual 0.9967828 0.9950799 0.8496788
## triangle 0.9968457 0.9949700 0.8452099
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## edges 0.9997922 0.9994724 0.9995827
## nodematch.sex 0.9989568 0.9992095 0.9988004
## nodematch.grade 0.8529524 0.8577262 0.8587801
## nodematch.smokes 1.0000000 0.9992027 0.9992443
## nodeicov.indegree 0.9992027 1.0000000 0.9986497
## nodeocov.outdegree 0.9992443 0.9986497 1.0000000
## mutual 0.9969793 0.9951137 0.9960971
## triangle 0.9971082 0.9951650 0.9960478
## mutual triangle
## edges 0.9967828 0.9968457
## nodematch.sex 0.9950799 0.9949700
## nodematch.grade 0.8496788 0.8452099
## nodematch.smokes 0.9969793 0.9971082
## nodeicov.indegree 0.9951137 0.9951650
## nodeocov.outdegree 0.9960971 0.9960478
## mutual 1.0000000 0.9998005
## triangle 0.9998005 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## edges nodematch.sex nodematch.grade nodematch.smokes
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.9978822 0.9973618 0.9588703 0.9978785
## Lag 2048 0.9956097 0.9946214 0.9174921 0.9956189
## Lag 3072 0.9932108 0.9918648 0.8849958 0.9932406
## Lag 4096 0.9906572 0.9890740 0.8572930 0.9906926
## Lag 5120 0.9879263 0.9860803 0.8325374 0.9879866
## nodeicov.indegree nodeocov.outdegree mutual triangle
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.9976138 0.9975838 0.9980094 0.9981041
## Lag 2048 0.9951048 0.9950928 0.9959369 0.9960488
## Lag 3072 0.9925363 0.9924803 0.9937697 0.9938370
## Lag 4096 0.9898620 0.9897839 0.9914486 0.9914494
## Lag 5120 0.9870176 0.9869136 0.9889869 0.9889109
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges nodematch.sex nodematch.grade
## 0.8060 0.8677 2.7930
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 0.7953 0.8363 0.8493
## mutual triangle
## 0.7576 0.7353
##
## Individual P-values (lower = worse):
## edges nodematch.sex nodematch.grade
## 0.420238633 0.385536607 0.005221836
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 0.426455867 0.403004913 0.395702309
## mutual triangle
## 0.448679531 0.462131207
## Joint P-value (lower = worse): 0.9728799 .
## Warning in formals(fun): argument is not a function
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
## Warning: You appear to be calling simulate.ergm() directly. simulate.ergm()
## is a method, and will not be exported in a future version of 'ergm'. Use
## simulate() instead, or getS3method() if absolutely necessary.
## Warning: You appear to be calling simulate.formula() directly.
## simulate.formula() is a method, and will not be exported in a future
## version of 'ergm'. Use simulate() instead, or getS3method() if absolutely
## necessary.
mod.gwesp<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual+gwesp)## Starting maximum pseudolikelihood estimation (MPLE):
## Evaluating the predictor and response matrix.
## Maximizing the pseudolikelihood.
## Finished MPLE.
## Starting Monte Carlo maximum likelihood estimation (MCMLE):
## Iteration 1 of at most 20:
## Optimizing with step length 0.439769576145639.
## The log-likelihood improved by 3.137.
## Iteration 2 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 2.209.
## Step length converged once. Increasing MCMC sample size.
## Iteration 3 of at most 20:
## Optimizing with step length 1.
## The log-likelihood improved by 0.09391.
## Step length converged twice. Stopping.
## Finished MCMLE.
## Evaluating log-likelihood at the estimate. Using 20 bridges: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 .
## This model was fit using MCMC. To examine model diagnostics and check for degeneracy, use the mcmc.diagnostics() function.
##
## ==========================
## Summary of model fit
## ==========================
##
## Formula: g ~ edges + nodematch("sex") + nodematch("grade") + nodematch("smokes") +
## nodeicov("indegree") + nodeocov("outdegree") + mutual + gwesp
##
## Iterations: 3 out of 20
##
## Monte Carlo MLE Results:
## Estimate Std. Error MCMC % z value Pr(>|z|)
## edges -8.04077 0.37598 0 -21.386 <1e-04 ***
## nodematch.sex 0.51263 0.11927 0 4.298 <1e-04 ***
## nodematch.grade 2.18991 0.17145 0 12.773 <1e-04 ***
## nodematch.smokes 0.78448 0.13915 0 5.637 <1e-04 ***
## nodeicov.indegree 0.24020 0.02940 0 8.169 <1e-04 ***
## nodeocov.outdegree 0.27260 0.03537 0 7.708 <1e-04 ***
## mutual 2.06821 0.24307 0 8.509 <1e-04 ***
## gwesp 0.64689 0.15128 0 4.276 <1e-04 ***
## gwesp.decay 0.07613 0.16819 0 0.453 0.651
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Null Deviance: 6890 on 4970 degrees of freedom
## Residual Deviance: 1297 on 4961 degrees of freedom
##
## AIC: 1315 BIC: 1374 (Smaller is better.)
## Sample statistics summary:
##
## Iterations = 16384:4209664
## Thinning interval = 1024
## Number of chains = 1
## Sample size per chain = 4096
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## edges -4.525 19.030 0.2973 0.9957
## nodematch.sex -1.023 14.591 0.2280 0.8572
## nodematch.grade -2.803 14.785 0.2310 0.9135
## nodematch.smokes -3.635 15.717 0.2456 0.8375
## nodeicov.indegree -17.785 112.436 1.7568 5.7673
## nodeocov.outdegree -22.485 109.066 1.7042 5.6454
## mutual -1.917 7.664 0.1197 0.4814
## gwesp -5.175 22.742 0.3553 1.2982
## gwesp.decay -2.991 13.736 0.2146 0.8135
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## edges -41.62 -17.00 -4.000 8.000 33.62
## nodematch.sex -30.00 -11.00 -1.000 9.000 28.00
## nodematch.grade -30.00 -13.00 -3.000 7.000 27.00
## nodematch.smokes -34.00 -14.00 -4.000 7.000 27.00
## nodeicov.indegree -237.62 -92.00 -16.000 53.000 205.00
## nodeocov.outdegree -239.00 -93.00 -22.000 51.000 195.00
## mutual -17.00 -7.00 -2.000 3.000 13.00
## gwesp -48.52 -19.87 -5.566 9.968 40.12
## gwesp.decay -29.92 -12.19 -3.044 6.279 23.96
##
##
## Sample statistics cross-correlations:
## edges nodematch.sex nodematch.grade
## edges 1.0000000 0.8005102 0.8247141
## nodematch.sex 0.8005102 1.0000000 0.6253056
## nodematch.grade 0.8247141 0.6253056 1.0000000
## nodematch.smokes 0.8567774 0.6686600 0.7240669
## nodeicov.indegree 0.9302022 0.7428550 0.7006378
## nodeocov.outdegree 0.9527578 0.7427972 0.7428459
## mutual 0.7851500 0.6380697 0.7801156
## gwesp 0.9228793 0.7241971 0.8373366
## gwesp.decay 0.7919423 0.5932845 0.8082424
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## edges 0.8567774 0.9302022 0.9527578
## nodematch.sex 0.6686600 0.7428550 0.7427972
## nodematch.grade 0.7240669 0.7006378 0.7428459
## nodematch.smokes 1.0000000 0.7345028 0.7772568
## nodeicov.indegree 0.7345028 1.0000000 0.8986153
## nodeocov.outdegree 0.7772568 0.8986153 1.0000000
## mutual 0.6848388 0.7197478 0.7428481
## gwesp 0.7802764 0.8778279 0.9000905
## gwesp.decay 0.6718997 0.7611554 0.7829299
## mutual gwesp gwesp.decay
## edges 0.7851500 0.9228793 0.7919423
## nodematch.sex 0.6380697 0.7241971 0.5932845
## nodematch.grade 0.7801156 0.8373366 0.8082424
## nodematch.smokes 0.6848388 0.7802764 0.6718997
## nodeicov.indegree 0.7197478 0.8778279 0.7611554
## nodeocov.outdegree 0.7428481 0.9000905 0.7829299
## mutual 1.0000000 0.8162420 0.7640932
## gwesp 0.8162420 1.0000000 0.8727561
## gwesp.decay 0.7640932 0.8727561 1.0000000
##
## Sample statistics auto-correlation:
## Chain 1
## edges nodematch.sex nodematch.grade nodematch.smokes
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.7269258 0.7361147 0.8195312 0.7226876
## Lag 2048 0.5829042 0.5983578 0.7019531 0.5770559
## Lag 3072 0.4947217 0.5179058 0.6068046 0.4938609
## Lag 4096 0.4394357 0.4639378 0.5457865 0.4378630
## Lag 5120 0.3872358 0.4139731 0.4935546 0.3836537
## nodeicov.indegree nodeocov.outdegree mutual gwesp
## Lag 0 1.0000000 1.0000000 1.0000000 1.0000000
## Lag 1024 0.7207850 0.7259825 0.8326302 0.7717094
## Lag 2048 0.5719512 0.5823544 0.7193619 0.6481079
## Lag 3072 0.4867342 0.4903281 0.6297919 0.5649531
## Lag 4096 0.4278588 0.4310962 0.5612675 0.5089659
## Lag 5120 0.3751412 0.3801334 0.4970629 0.4523930
## gwesp.decay
## Lag 0 1.0000000
## Lag 1024 0.7727372
## Lag 2048 0.6475194
## Lag 3072 0.5631358
## Lag 4096 0.5010415
## Lag 5120 0.4502251
##
## Sample statistics burn-in diagnostic (Geweke):
## Chain 1
##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## edges nodematch.sex nodematch.grade
## -0.5564 -0.6961 -0.5742
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## -0.2424 -0.6078 -0.4281
## mutual gwesp gwesp.decay
## -0.5744 -0.6288 -0.7071
##
## Individual P-values (lower = worse):
## edges nodematch.sex nodematch.grade
## 0.5779188 0.4863770 0.5658610
## nodematch.smokes nodeicov.indegree nodeocov.outdegree
## 0.8084409 0.5433314 0.6685615
## mutual gwesp gwesp.decay
## 0.5656642 0.5294929 0.4794825
## Joint P-value (lower = worse): 0.9413868 .
## Warning in formals(fun): argument is not a function
##
## MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).
##
## Goodness-of-fit for in-degree
##
## obs min mean max MC p-value
## 0 3 4 8.26 15 0.00
## 1 4 4 8.16 16 0.08
## 2 15 2 8.79 17 0.04
## 3 11 3 9.28 17 0.50
## 4 10 3 8.17 14 0.60
## 5 4 1 6.40 14 0.44
## 6 9 1 5.25 10 0.10
## 7 6 0 4.03 11 0.48
## 8 5 0 3.69 9 0.56
## 9 1 0 3.03 7 0.32
## 10 1 0 2.03 5 0.70
## 11 1 0 1.25 4 1.00
## 12 1 0 0.91 3 1.00
## 13 0 0 0.51 2 1.00
## 14 0 0 0.35 3 1.00
## 15 0 0 0.32 2 1.00
## 16 0 0 0.11 1 1.00
## 17 0 0 0.16 2 1.00
## 18 0 0 0.15 2 1.00
## 19 0 0 0.08 1 1.00
## 20 0 0 0.03 1 1.00
## 21 0 0 0.03 1 1.00
## 23 0 0 0.01 1 1.00
##
## Goodness-of-fit for out-degree
##
## obs min mean max MC p-value
## 0 7 5 9.88 15 0.38
## 1 5 2 7.31 15 0.46
## 2 8 2 8.28 15 1.00
## 3 7 2 7.59 13 1.00
## 4 9 3 7.43 14 0.54
## 5 9 1 7.21 14 0.62
## 6 12 1 6.04 10 0.00
## 7 6 1 5.03 10 0.76
## 8 5 0 3.72 9 0.58
## 9 2 0 2.99 7 0.72
## 10 1 0 1.89 5 0.82
## 11 0 0 1.12 5 0.64
## 12 0 0 0.76 3 0.92
## 13 0 0 0.65 3 1.00
## 14 0 0 0.47 2 1.00
## 15 0 0 0.23 2 1.00
## 16 0 0 0.18 1 1.00
## 17 0 0 0.09 2 1.00
## 18 0 0 0.08 1 1.00
## 19 0 0 0.01 1 1.00
## 20 0 0 0.03 1 1.00
## 23 0 0 0.01 1 1.00
##
## Goodness-of-fit for edgewise shared partner
##
## obs min mean max MC p-value
## esp0 64 43 61.64 82 0.80
## esp1 96 62 95.20 127 0.98
## esp2 63 51 68.03 92 0.68
## esp3 54 24 42.40 66 0.20
## esp4 16 6 22.10 41 0.38
## esp5 9 1 10.36 21 0.84
## esp6 1 0 4.14 9 0.12
## esp7 2 0 1.55 6 0.92
## esp8 0 0 0.55 4 1.00
## esp9 0 0 0.16 1 1.00
## esp10 0 0 0.09 1 1.00
##
## Goodness-of-fit for minimum geodesic distance
##
## obs min mean max MC p-value
## 1 305 254 306.22 349 0.94
## 2 606 639 886.24 1133 0.00
## 3 840 856 1290.84 1578 0.00
## 4 863 595 856.21 1144 1.00
## 5 639 62 281.74 531 0.00
## 6 414 2 60.14 237 0.00
## 7 244 0 10.70 119 0.00
## 8 182 0 1.48 53 0.00
## 9 112 0 0.09 4 0.00
## 10 51 0 0.00 0 0.00
## 11 28 0 0.00 0 0.00
## 12 5 0 0.00 0 0.00
## Inf 681 678 1276.34 1995 0.02
##
## Goodness-of-fit for model statistics
##
## obs min mean max MC p-value
## edges 305 254 306.22 349 0.94
## nodematch.sex 180 148 179.07 226 1.00
## nodematch.grade 233 193 234.80 267 0.82
## nodematch.smokes 230 198 229.51 275 0.98
## nodeicov.indegree 1807 1522 1821.48 2072 0.84
## nodeocov.outdegree 1777 1514 1786.96 2058 0.98
## mutual 85 67 85.70 104 1.00
## esp#1 96 62 95.20 127 0.98
## esp#2 63 51 68.03 92 0.68
## esp#3 54 24 42.40 66 0.20
## esp#4 16 6 22.10 41 0.38
## esp#5 9 1 10.36 21 0.84
## esp#6 1 0 4.14 9 0.12
## esp#7 2 0 1.55 6 0.92
## esp#8 0 0 0.55 4 1.00
## esp#9 0 0 0.16 1 1.00
## esp#10 0 0 0.09 1 1.00
These models allow us to fit a network without having to specify the actual tie formation patterns. The most user-friendly latent space model software is in the latentnet package (more recent models are provided by the amen package).
library(latentnet)
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 2))
runtime=Sys.time()-start.time;
runtime;
summary(latent.fit)
plot(latent.fit)
mcmc.diagnostics(latent.fit)
plot(gof(latent.fit))
# Can add additional dimensions...
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 3))
runtime=Sys.time()-start.time;
runtime;
plot(gof(latent.fit))
# ... latent groups ...
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 2, G = 5))
runtime=Sys.time()-start.time;
runtime;
plot(latent.fit)
plot(gof(latent.fit))
# ... or homophily effects
start.time <- Sys.time()
latent.fit <- ergmm(g ~ nodematch("grade") + nodematch("sex")+euclidean(d = 2))
runtime=Sys.time()-start.time;
runtime
summary(latent.fit)